/******************************************************************************
This file cleans the baseline test score data.
--------------------------------------------------------------------------------
Input:			Raw 3rd - 8th grade standardized test data sets
					> {yyyy}-{yy}_Student_test-Biog_All_G38_NYC_Scrambled.dta
--------------------------------------------------------------------------------
Intermediate:	Appended Raw 3rd - 8th grade standardized test data sets
					> baseline_scores_standardized_all_appended.dta
--------------------------------------------------------------------------------
Output:			Cleaned 3rd - 8th grade standardized test data sets
					> baseline_scores_standardized_grade{g}.dta
********************************************************************************/

	clear all
	local append 1

// Append all files
if `append' == 1 {
	set obs 1
	gen  year = .
	tempfile append
	sa `append'

	// Loop over years
	forval yr = 2005/2016 {

		local digits_plus = `yr' - 2000 + 1
		if length("`digits_plus'") == 1 local digits_plus 0`digits_plus'
		local next_year = `yr' +  1

		// Load raw file
		use "${cleandata}outcomes/TestBiog/`yr'-`digits_plus'_Student_test-Biog_All_G38_NYC_Scrambled", clear

		destring *_raw_score, replace force
		destring *_test_grade, replace
		destring *_scale_score, replace

		// Generate spring year indicator
		gen bl_exam_year = `next_year'

		append using `append'

		sa `append', replace

	}

	keep *_raw_score *_test_grade bl_exam_year stu *_scale_score

	// Reshape long by test x student x year
	reshape long  @_raw_score @_test_grade @_scale_score, i(student_id  bl_exam_year) j(subject) string
	 ren _* *

	* Harmonize variable names
	ren student_id_scram stu

	sa "${cleandata}baseline_scores_standardized_all_appended.dta", replace

}


use "${cleandata}baseline_scores_standardized_all_appended.dta", clear


********************************************************************
******** Keeping tests for xth grade
********************************************************************

foreach grade in 6 7 8 {
	preserve
		// Keep if grade matches and score is not missing
		keep if test_grade == `grade'
		drop if raw_score == .

		* Use the most recent attempt
		bys stu subject: egen max_year = max( bl_exam_year )
		drop if bl_exam_year != max_year
		drop max_year

		* Standardize by year
		bys subject bl_exam_year test_grade: egen mean = mean(scale_score)
		bys subject bl_exam_year test_grade: egen sd = sd(scale_score)
		gen bl_ss  = (scale_score - mean ) / sd
		drop mean sd

		replace subject = "_" + subject

		* Reshape wide
		reshape wide bl_exam_year@ test_grade@ raw_score@ bl_ss@ scale_score@, i(stu) j(subject) string

		sa "${cleandata}baseline_scores_standardized_grade`grade'.dta", replace

	restore
}
